library(ggplot2)

setwd("/Users/ab6415/Documents/Work_laptop_stuff/Bioinformatics/Analysis/INTEGRATOR_PAPER_ANALYSES/Fig1_Fig3_FigS3_FigS4_precursors_analysis/length/")

piRNA_counts_wholeanimal_E1 <- list.files(path="../counts/wholeworm/",
    pattern="*pis"); piRNA_counts_wholeanimal_E1<-paste0("../counts/wholeworm/",piRNA_counts_wholeanimal_E1,sep="")
piRNA_counts_wholeanimal_E1_longreads <- list.files(path="../counts/wholeworm_longreads/",
    pattern="*pis"); piRNA_counts_wholeanimal_E1_longreads<-paste0("../counts/wholeworm_longreads/",piRNA_counts_wholeanimal_E1_longreads,sep="")
piRNA_counts_germnuclei_total <- list.files(path="../counts/germnuclei_total/",
    pattern="*pis"); piRNA_counts_germnuclei_total<-paste0("../counts/germnuclei_total/",piRNA_counts_germnuclei_total,sep="")
piRNA_counts_germnuclei_fractionation <- list.files(path="../counts/germnuclei_fractionation_rep1/",
    pattern="*pis"); piRNA_counts_germnuclei_fractionation<-paste0("../counts/germnuclei_fractionation_rep1/",piRNA_counts_germnuclei_fractionation,sep="")
piRNA_counts_germnuclei_fractionation_rep2 <- list.files(path="../counts/germnuclei_fractionation_rep2/",
    pattern="*pis"); piRNA_counts_germnuclei_fractionation_rep2<-paste0("../counts/germnuclei_fractionation_rep2/",piRNA_counts_germnuclei_fractionation_rep2,sep="")


piRNA_counts_filenames<-c(piRNA_counts_wholeanimal_E1,piRNA_counts_wholeanimal_E1_longreads,piRNA_counts_germnuclei_total,piRNA_counts_germnuclei_fractionation,piRNA_counts_germnuclei_fractionation_rep2)
piRNA_counts_filenames<-piRNA_counts_filenames[c(1:13,19:21,14:18,27:30,22,23,25,26)]

names <-c("wholeworm_EV_col1","wholeworm_ints11_col1","wholeworm_EV_col2","wholeworm_ints11_col2","wholeworm_EV_96h","wholeworm_ints11_96h","wholeworm_EV_col1_longreads","wholeworm_ints11_col1_longreads","wholeworm_ints11_col2_longreads","germnuclei_total_N2_EV","germnuclei_total_N2_ints11","germnuclei_total_TFIIS_EV","germnuclei_total_TFIIS_ints11","germnuclei_NP_N2_EV","germnuclei_NP_N2_ints11","germnuclei_NP_TFIIS_EV","germnuclei_NP_TFIIS_ints11","germnuclei_CHR_N2_EV","germnuclei_CHR_N2_ints11","germnuclei_CHR_TFIIS_EV","germnuclei_CHR_TFIIS_ints11","germnuclei_rep2_NP_N2_EV","germnuclei_rep2_NP_N2_ints11","germnuclei_rep2_NP_TFIIS_EV","germnuclei_rep2_NP_TFIIS_ints11","germnuclei_rep2_CHR_N2_EV","germnuclei_rep2_CHR_N2_ints11","germnuclei_rep2_CHR_TFIIS_EV","germnuclei_rep2_CHR_TFIIS_ints11")

piRNA_counts_filenames
##  [1] "../counts/wholeworm/Sample_1_EV_col1.fastq.trimmed.sorted.bed.pis"                                          
##  [2] "../counts/wholeworm/Sample_3_ints_11_col1.fastq.trimmed.sorted.bed.pis"                                     
##  [3] "../counts/wholeworm/Sample_4_EV_col2.fastq.trimmed.sorted.bed.pis"                                          
##  [4] "../counts/wholeworm/Sample_6_ints_11_col2.fastq.trimmed.sorted.bed.pis"                                     
##  [5] "../counts/wholeworm/Sample_7_EV_col2_96h.fastq.trimmed.sorted.bed.pis"                                      
##  [6] "../counts/wholeworm/Sample_9_ints_11_col2_96h.fastq.trimmed.sorted.bed.pis"                                 
##  [7] "../counts/wholeworm_longreads/1_EV_col1_S1_R1_001.fastq.trimmed.sorted.bed.pis"                             
##  [8] "../counts/wholeworm_longreads/3_ints-11_col1_S2_R1_001.fastq.trimmed.sorted.bed.pis"                        
##  [9] "../counts/wholeworm_longreads/9_ints-11_col2_96h_S3_R1_001.fastq.trimmed.sorted.bed.pis"                    
## [10] "../counts/germnuclei_total/Sample_13_N2_EV_CR_S63_L007_R1_001.fastq.trimmed.sorted.bed.pis"                 
## [11] "../counts/germnuclei_total/Sample_14_N2_Ints11_CR_S64_L007_R1_001.fastq.trimmed.sorted.bed.pis"             
## [12] "../counts/germnuclei_total/Sample_15_TFIIS_EV_CR_S65_L007_R1_001.fastq.trimmed.sorted.bed.pis"              
## [13] "../counts/germnuclei_total/Sample_16_TFIIS_Ints11_CR_S66_L007_R1_001.fastq.trimmed.sorted.bed.pis"          
## [14] "../counts/germnuclei_fractionation_rep1/7_N2_EV_np_S7_R1_001.fastq.trimmed.sorted.bed.pis"                  
## [15] "../counts/germnuclei_fractionation_rep1/8_N2_ints-11RNAi_np_S8_R1_001.fastq.trimmed.sorted.bed.pis"         
## [16] "../counts/germnuclei_fractionation_rep1/9_TFIIS_EV_np_S9_R1_001.fastq.trimmed.sorted.bed.pis"               
## [17] "../counts/germnuclei_fractionation_rep1/10_TFIIS_ints-11RNAi_np_S10_R1_001.fastq.trimmed.sorted.bed.pis"    
## [18] "../counts/germnuclei_fractionation_rep1/25_N2_EV_chr_S11_R1_001.fastq.trimmed.sorted.bed.pis"               
## [19] "../counts/germnuclei_fractionation_rep1/26_N2_ints-11RNAi_chr_S12_R1_001.fastq.trimmed.sorted.bed.pis"      
## [20] "../counts/germnuclei_fractionation_rep1/27_TFIIS_EV_chr_S13_R1_001.fastq.trimmed.sorted.bed.pis"            
## [21] "../counts/germnuclei_fractionation_rep1/28_TFIIS_ints-11RNAi_chr_S14_R1_001.fastq.trimmed.sorted.bed.pis"   
## [22] "../counts/germnuclei_fractionation_rep2/5_N2_EV_np_S5_R1_001.fastq.trimmed.sorted.bed.pis"                  
## [23] "../counts/germnuclei_fractionation_rep2/6_N2_ints11RNAi_np_S6_R1_001.fastq.trimmed.sorted.bed.pis"          
## [24] "../counts/germnuclei_fractionation_rep2/7_TFIIS_EV_np_S7_R1_001.fastq.trimmed.sorted.bed.pis"               
## [25] "../counts/germnuclei_fractionation_rep2/8_TFIIS_ints11RNAi_np_S8_R1_001.fastq.trimmed.sorted.bed.pis"       
## [26] "../counts/germnuclei_fractionation_rep2/1_N2_EV_chr_S1_R1_001.fastq.trimmed.sorted.bed.pis"                 
## [27] "../counts/germnuclei_fractionation_rep2/13_N2_ints11RNAi_chr_techrep_S9_R1_001.fastq.trimmed.sorted.bed.pis"
## [28] "../counts/germnuclei_fractionation_rep2/3_TFIIS_EV_chr_S3_R1_001.fastq.trimmed.sorted.bed.pis"              
## [29] "../counts/germnuclei_fractionation_rep2/4_TFIIS_ints11RNAi_chr_S4_R1_001.fastq.trimmed.sorted.bed.pis"
names         
##  [1] "wholeworm_EV_col1"                "wholeworm_ints11_col1"           
##  [3] "wholeworm_EV_col2"                "wholeworm_ints11_col2"           
##  [5] "wholeworm_EV_96h"                 "wholeworm_ints11_96h"            
##  [7] "wholeworm_EV_col1_longreads"      "wholeworm_ints11_col1_longreads" 
##  [9] "wholeworm_ints11_col2_longreads"  "germnuclei_total_N2_EV"          
## [11] "germnuclei_total_N2_ints11"       "germnuclei_total_TFIIS_EV"       
## [13] "germnuclei_total_TFIIS_ints11"    "germnuclei_NP_N2_EV"             
## [15] "germnuclei_NP_N2_ints11"          "germnuclei_NP_TFIIS_EV"          
## [17] "germnuclei_NP_TFIIS_ints11"       "germnuclei_CHR_N2_EV"            
## [19] "germnuclei_CHR_N2_ints11"         "germnuclei_CHR_TFIIS_EV"         
## [21] "germnuclei_CHR_TFIIS_ints11"      "germnuclei_rep2_NP_N2_EV"        
## [23] "germnuclei_rep2_NP_N2_ints11"     "germnuclei_rep2_NP_TFIIS_EV"     
## [25] "germnuclei_rep2_NP_TFIIS_ints11"  "germnuclei_rep2_CHR_N2_EV"       
## [27] "germnuclei_rep2_CHR_N2_ints11"    "germnuclei_rep2_CHR_TFIIS_EV"    
## [29] "germnuclei_rep2_CHR_TFIIS_ints11"
piRNA_counts <- lapply(piRNA_counts_filenames,function(i){
  read.csv(paste(i,sep=""), header=FALSE,sep="\t")
})


piRNA_counts_precfiltered <- lapply(piRNA_counts, function(df){
 df$plus_d <- df$V10-df$V2
 df$minus_d <- df$V3-df$V11
 df_plus<-df[which(df$plus_d==2 & df$V8=="+"),]
 df_minus<-df[which(df$minus_d==(2) & df$V8=="-"),]
 return(rbind(df_plus,df_minus))
})

motifdep_piRNA_precursors<-lapply(piRNA_counts_precfiltered,function(df){return(df[c(grep("^21UR-",df$V12),grep("^piRNA_type_1",df$V12)),])})
motifind_piRNA_precursors<-lapply(piRNA_counts_precfiltered,function(df){return(df[grep("^piRNA_type_2",df$V12),])})

N2_EV_np<-rbind(motifdep_piRNA_precursors[[14]],motifdep_piRNA_precursors[[22]])
N2_EV_np<-aggregate(N2_EV_np[,"V6"],by=N2_EV_np[c("V1","V2","V3","V4","V5","V12")],FUN=sum)
N2_ints11_np<-rbind(motifdep_piRNA_precursors[[15]],motifdep_piRNA_precursors[[23]])
N2_ints11_np<-aggregate(N2_ints11_np[,"V6"],by=N2_ints11_np[c("V1","V2","V3","V4","V5","V12")],FUN=sum)
tfiis_EV_np<-rbind(motifdep_piRNA_precursors[[16]],motifdep_piRNA_precursors[[24]])
tfiis_EV_np<-aggregate(tfiis_EV_np[,"V6"],by=tfiis_EV_np[c("V1","V2","V3","V4","V5","V12")],FUN=sum)
tfiis_ints11_np<-rbind(motifdep_piRNA_precursors[[17]],motifdep_piRNA_precursors[[25]])
tfiis_ints11_np<-aggregate(tfiis_ints11_np[,"V6"],by=tfiis_ints11_np[c("V1","V2","V3","V4","V5","V12")],FUN=sum)

N2_EV_chr<-rbind(motifdep_piRNA_precursors[[18]],motifdep_piRNA_precursors[[26]])
N2_EV_chr<-aggregate(N2_EV_chr[,"V6"],by=N2_EV_chr[c("V1","V2","V3","V4","V5","V12")],FUN=sum)
N2_ints11_chr<-rbind(motifdep_piRNA_precursors[[19]],motifdep_piRNA_precursors[[27]])
N2_ints11_chr<-aggregate(N2_ints11_chr[,"V6"],by=N2_ints11_chr[c("V1","V2","V3","V4","V5","V12")],FUN=sum)
tfiis_EV_chr<-rbind(motifdep_piRNA_precursors[[20]],motifdep_piRNA_precursors[[28]])
tfiis_EV_chr<-aggregate(tfiis_EV_chr[,"V6"],by=tfiis_EV_chr[c("V1","V2","V3","V4","V5","V12")],FUN=sum)
tfiis_ints11_chr<-rbind(motifdep_piRNA_precursors[[21]],motifdep_piRNA_precursors[[29]])
tfiis_ints11_chr<-aggregate(tfiis_ints11_chr[,"V6"],by=tfiis_ints11_chr[c("V1","V2","V3","V4","V5","V12")],FUN=sum)

motifdep_piRNA_precursors_agg<-list(N2_EV_np,N2_ints11_np,tfiis_EV_np,tfiis_ints11_np,
                                    N2_EV_chr,N2_ints11_chr,tfiis_EV_chr,tfiis_ints11_chr)

names_agg<-c("N2_EV_np","N2_ints11_np","tfiis_EV_np","tfiis_ints11_np",
             "N2_EV_chr","N2_ints11_chr","tfiis_EV_chr","tfiis_ints11_chr")

Locus-by-locus analysis

Median length shift

library(reshape2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
motifdep_piRNA_precursors_data_summary<-lapply(motifdep_piRNA_precursors_agg,function(df){
  number_seqs<-aggregate(df[,"V12"],by=list(df[,"V12"]),FUN=length)
  median_length<-aggregate(df[,"V4"],by=list(df[,"V12"]),FUN=median)
  mean_length<-aggregate(df[,"V4"],by=list(df[,"V12"]),FUN=mean)
  
  number_reads<-aggregate(df[,"x"],by=list(df[,"V12"]),FUN=sum)
  df_uncol<-df[rep(row.names(df), df$x),]
  median_length_numreads<-aggregate(df_uncol[,"V4"],by=list(df_uncol[,"V12"]),FUN=median)
  mean_length_numreads<-aggregate(df_uncol[,"V4"],by=list(df_uncol[,"V12"]),FUN=mean)

  merged<-data.frame(number_seqs$x,
                   median_length$x,
                   mean_length$x,
                   number_reads$x,
                   median_length_numreads$x,
                   mean_length_numreads$x)
  colnames(merged)<-c("number_seqs","median_length","mean_length","number_reads","median_length_numreads","mean_length_numreads")
  rownames(merged)<-number_seqs$Group.1
  return(merged)
})


merged_df<-motifdep_piRNA_precursors_data_summary[[1]]
colnames(merged_df)<-paste0(names_agg[1],colnames(merged_df))
for (i in seq(2,length(names_agg))){
  df<-motifdep_piRNA_precursors_data_summary[[i]]
  colnames(df)<-paste0(names_agg[i],colnames(df))
  merged_df<-merge(merged_df,df,by=0,all=TRUE)
  rownames(merged_df)<-merged_df$Row.names
  merged_df$Row.names<-NULL
}
merged_df$rownames<-rownames(merged_df)

sample_index_to_medianlength_index<-function(index){return((index-1)*6+2)}

lenshift<-function(df,treatment_index,control_index,numseqs_threshold){
  index1<-sample_index_to_medianlength_index(treatment_index)
  index2<-sample_index_to_medianlength_index(control_index)
  df<-df[which(df[,index1-1]>numseqs_threshold & df[,index2-1]>numseqs_threshold),]
  out<-data.frame(names=as.character(df$rownames),lenshift=df[,index1]-df[,index2])
  out$names<-as.character(out$names)
  return(out)
}

test<-merged_df[sample_index_to_medianlength_index(2)]-merged_df[sample_index_to_medianlength_index(1)]
test<-na.exclude(test$N2_ints11_npmedian_length)
cor(test,lenshift(merged_df,2,1,0)[,2])
## [1] 1
thr<-0
median_lenshift_thr<-data.frame(
                            names<-c(
                            lenshift(merged_df,2,1,thr)[,1],
                            lenshift(merged_df,3,1,thr)[,1],
                            lenshift(merged_df,4,1,thr)[,1],
                            lenshift(merged_df,6,5,thr)[,1],
                            lenshift(merged_df,7,5,thr)[,1],
                            lenshift(merged_df,8,5,thr)[,1]),
                            median_length_shift<-c(
                            lenshift(merged_df,2,1,thr)[,2],
                            lenshift(merged_df,3,1,thr)[,2],
                            lenshift(merged_df,4,1,thr)[,2],
                            lenshift(merged_df,6,5,thr)[,2],
                            lenshift(merged_df,7,5,thr)[,2],
                            lenshift(merged_df,8,5,thr)[,2]),
                            sample<-c(
                            rep(names_agg[2],nrow(lenshift(merged_df,2,1,thr))),
                            rep(names_agg[3],nrow(lenshift(merged_df,3,1,thr))),
                            rep(names_agg[4],nrow(lenshift(merged_df,4,1,thr))),
                            rep(names_agg[6],nrow(lenshift(merged_df,6,5,thr))),
                            rep(names_agg[7],nrow(lenshift(merged_df,7,5,thr))),
                            rep(names_agg[8],nrow(lenshift(merged_df,8,5,thr)))
                            ),
                            fraction<-c(
                            rep("nucleoplasm",nrow(lenshift(merged_df,2,1,thr))),
                            rep("nucleoplasm",nrow(lenshift(merged_df,3,1,thr))),
                            rep("nucleoplasm",nrow(lenshift(merged_df,4,1,thr))),
                            rep("chromatin",nrow(lenshift(merged_df,6,5,thr))),
                            rep("chromatin",nrow(lenshift(merged_df,7,5,thr))),
                            rep("chromatin",nrow(lenshift(merged_df,8,5,thr)))
                            ),
                            condition<-c(
                            rep("N2 ints-11 RNAi",nrow(lenshift(merged_df,2,1,thr))),
                            rep("tfiis EV",nrow(lenshift(merged_df,3,1,thr))),
                            rep("tfiis ints-11 RNAi",nrow(lenshift(merged_df,4,1,thr))),
                            rep("N2 ints-11 RNAi",nrow(lenshift(merged_df,6,5,thr))),
                            rep("tfiis EV",nrow(lenshift(merged_df,7,5,thr))),
                            rep("tfiis ints-11 RNAi",nrow(lenshift(merged_df,8,5,thr)))
                            ))

colnames(median_lenshift_thr)<-c("piRNA_IDs","median length shift","sample","fraction","condition")
median_lenshift_thr$sample<-factor(median_lenshift_thr$sample,levels = names_agg)
median_lenshift_thr$condition<-factor(median_lenshift_thr$condition,levels=c("N2 ints-11 RNAi","tfiis EV","tfiis ints-11 RNAi"))
median_lenshift_thr$fraction<-factor(median_lenshift_thr$fraction,levels=c("total","nucleoplasm","chromatin"))

ggplot(median_lenshift_thr)+geom_boxplot(aes(y=`median length shift`,x=sample,col=fraction,fill=condition),outlier.shape=NA)+
  coord_cartesian(ylim=c(-25,50))+
  scale_fill_manual(values=c(alpha("green",0.2),alpha("red",0.2),alpha("blue",0.2)))+
  scale_colour_manual(values=c("red","black"))+
  geom_hline(yintercept = 0,col="red",linetype="dashed")+
  theme_classic()+
  ylab("median length shift")+
  ggtitle("median length shift - collapsed sequences")

ggsave("median_length_shift_collapsedreads_aggreps.pdf")
## Saving 7 x 5 in image
pausing_strength_data<-read.table("../../Reference_pausing_signal_strength/all_pis_typeI_pausingscores.bed",header=TRUE)
pausing_strength_data<-pausing_strength_data[,c(4,10:12)]
pausing_strength_data$V4<-as.character(pausing_strength_data$V4)


median_lenshift_thr_pausingstrength<-merge(median_lenshift_thr,pausing_strength_data,
                                                   by.x="piRNA_IDs",by.y="V4")
median_lenshift_thr_pausingstrength$tm_downstream_bins<-factor(median_lenshift_thr_pausingstrength$tm_downstream_bins,                                                                   levels=c("(-713,-94.8]","(-94.8,-18.3]","(-18.3,39.8]","(39.8,101]","(101,412]"))
table(median_lenshift_thr_pausingstrength$tm_downstream_bins)
## 
##  (-713,-94.8] (-94.8,-18.3]  (-18.3,39.8]    (39.8,101]     (101,412] 
##          2117          2017          1929          1921          1530
ggplot(median_lenshift_thr_pausingstrength)+geom_boxplot(aes(y=`median length shift`,x=sample,col=fraction,fill=tm_downstream_bins),outlier.shape=NA)+
  coord_cartesian(ylim=c(-25,50))+
  scale_colour_manual(values=c("red","black"))+
  scale_fill_brewer(palette = "GnBu")+
  geom_hline(yintercept = 0,col="red",linetype="dashed")+
  theme_classic()+
  ylab("median length shift")+
  ggtitle("median length shift - collapsed sequences")

ggsave("median_length_shift_collapsedreads_aggreps_pausingstrength.pdf")
## Saving 7 x 5 in image

Mean length shift

sample_index_to_meanlength_index<-function(index){return((index-1)*6+3)}

lenshift<-function(df,treatment_index,control_index,numseqs_threshold){
  index1<-sample_index_to_meanlength_index(treatment_index)
  index2<-sample_index_to_meanlength_index(control_index)
  df<-df[which(df[,index1-1]>numseqs_threshold & df[,index2-1]>numseqs_threshold),]
  out<-data.frame(names=as.character(df$rownames),lenshift=df[,index1]-df[,index2])
  out$names<-as.character(out$names)
  return(out)
}

thr<-0
thr<-0
median_lenshift_thr<-data.frame(
                            names<-c(
                            lenshift(merged_df,2,1,thr)[,1],
                            lenshift(merged_df,3,1,thr)[,1],
                            lenshift(merged_df,4,1,thr)[,1],
                            lenshift(merged_df,6,5,thr)[,1],
                            lenshift(merged_df,7,5,thr)[,1],
                            lenshift(merged_df,8,5,thr)[,1]),
                            median_length_shift<-c(
                            lenshift(merged_df,2,1,thr)[,2],
                            lenshift(merged_df,3,1,thr)[,2],
                            lenshift(merged_df,4,1,thr)[,2],
                            lenshift(merged_df,6,5,thr)[,2],
                            lenshift(merged_df,7,5,thr)[,2],
                            lenshift(merged_df,8,5,thr)[,2]),
                            sample<-c(
                            rep(names_agg[2],nrow(lenshift(merged_df,2,1,thr))),
                            rep(names_agg[3],nrow(lenshift(merged_df,3,1,thr))),
                            rep(names_agg[4],nrow(lenshift(merged_df,4,1,thr))),
                            rep(names_agg[6],nrow(lenshift(merged_df,6,5,thr))),
                            rep(names_agg[7],nrow(lenshift(merged_df,7,5,thr))),
                            rep(names_agg[8],nrow(lenshift(merged_df,8,5,thr)))
                            ),
                            fraction<-c(
                            rep("nucleoplasm",nrow(lenshift(merged_df,2,1,thr))),
                            rep("nucleoplasm",nrow(lenshift(merged_df,3,1,thr))),
                            rep("nucleoplasm",nrow(lenshift(merged_df,4,1,thr))),
                            rep("chromatin",nrow(lenshift(merged_df,6,5,thr))),
                            rep("chromatin",nrow(lenshift(merged_df,7,5,thr))),
                            rep("chromatin",nrow(lenshift(merged_df,8,5,thr)))
                            ),
                            condition<-c(
                            rep("N2 ints-11 RNAi",nrow(lenshift(merged_df,2,1,thr))),
                            rep("tfiis EV",nrow(lenshift(merged_df,3,1,thr))),
                            rep("tfiis ints-11 RNAi",nrow(lenshift(merged_df,4,1,thr))),
                            rep("N2 ints-11 RNAi",nrow(lenshift(merged_df,6,5,thr))),
                            rep("tfiis EV",nrow(lenshift(merged_df,7,5,thr))),
                            rep("tfiis ints-11 RNAi",nrow(lenshift(merged_df,8,5,thr)))
                            ))

colnames(median_lenshift_thr)<-c("piRNA_IDs","mean length shift","sample","fraction","condition")
median_lenshift_thr$sample<-factor(median_lenshift_thr$sample,levels = names_agg)
median_lenshift_thr$condition<-factor(median_lenshift_thr$condition,levels=c("N2 ints-11 RNAi","tfiis EV","tfiis ints-11 RNAi"))
median_lenshift_thr$fraction<-factor(median_lenshift_thr$fraction,levels=c("total","nucleoplasm","chromatin"))

ggplot(median_lenshift_thr)+geom_boxplot(aes(y=`mean length shift`,x=sample,col=fraction,fill=condition),outlier.shape=NA)+
  coord_cartesian(ylim=c(-25,50))+
  scale_fill_manual(values=c(alpha("green",0.2),alpha("red",0.2),alpha("blue",0.2)))+
  scale_colour_manual(values=c("red","black"))+
  geom_hline(yintercept = 0,col="red",linetype="dashed")+
  theme_classic()+
  ylab("mean length shift")+
  ggtitle("mean length shift - collapsed sequences")

ggsave("mean_length_shift_collapsedreads_aggreps.pdf")
## Saving 7 x 5 in image
median_lenshift_thr_pausingstrength<-merge(median_lenshift_thr,pausing_strength_data,
                                                   by.x="piRNA_IDs",by.y="V4")
median_lenshift_thr_pausingstrength$tm_downstream_bins<-factor(median_lenshift_thr_pausingstrength$tm_downstream_bins,                                                                   levels=c("(-713,-94.8]","(-94.8,-18.3]","(-18.3,39.8]","(39.8,101]","(101,412]"))
table(median_lenshift_thr_pausingstrength$tm_downstream_bins)
## 
##  (-713,-94.8] (-94.8,-18.3]  (-18.3,39.8]    (39.8,101]     (101,412] 
##          2117          2017          1929          1921          1530
ggplot(median_lenshift_thr_pausingstrength)+geom_boxplot(aes(y=`mean length shift`,x=sample,col=fraction,fill=tm_downstream_bins),outlier.shape=NA)+
  coord_cartesian(ylim=c(-25,50))+
  scale_colour_manual(values=c("red","black"))+
  scale_fill_brewer(palette = "GnBu")+
  geom_hline(yintercept = 0,col="red",linetype="dashed")+
  theme_classic()+
  ylab("mean length shift")+
  ggtitle("mean length shift - collapsed sequences")

ggsave("mean_length_shift_collapsedreads_aggreps_pausingstrength.pdf")
## Saving 7 x 5 in image
#ints11 alone in NP
index1<-sample_index_to_meanlength_index(1)
index2<-sample_index_to_meanlength_index(2)
wilcox.test(merged_df[,index1],merged_df[,index2],paired = TRUE)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  merged_df[, index1] and merged_df[, index2]
## V = 184710, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
#tfiis in NP
index1<-sample_index_to_meanlength_index(1)
index2<-sample_index_to_meanlength_index(3)
wilcox.test(merged_df[,index1],merged_df[,index2],paired = TRUE)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  merged_df[, index1] and merged_df[, index2]
## V = 179596, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
#ints11 tfiis in NP
index1<-sample_index_to_meanlength_index(1)
index2<-sample_index_to_meanlength_index(4)
wilcox.test(merged_df[,index1],merged_df[,index2],paired = TRUE)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  merged_df[, index1] and merged_df[, index2]
## V = 60802, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
#ints11 alone in CHR
index1<-sample_index_to_meanlength_index(5)
index2<-sample_index_to_meanlength_index(6)
wilcox.test(merged_df[,index1],merged_df[,index2],paired = TRUE)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  merged_df[, index1] and merged_df[, index2]
## V = 518957, p-value = 2.804e-10
## alternative hypothesis: true location shift is not equal to 0
#tfiis in CHR
index1<-sample_index_to_meanlength_index(5)
index2<-sample_index_to_meanlength_index(7)
wilcox.test(merged_df[,index1],merged_df[,index2],paired = TRUE)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  merged_df[, index1] and merged_df[, index2]
## V = 1113636, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
#ints11 tfiis in CHR
index1<-sample_index_to_meanlength_index(5)
index2<-sample_index_to_meanlength_index(8)
wilcox.test(merged_df[,index1],merged_df[,index2],paired = TRUE)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  merged_df[, index1] and merged_df[, index2]
## V = 284494, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0

Median length shift

sample_index_to_medianlengthNumreads_index<-function(index){return((index-1)*6+5)}

lenshift<-function(df,treatment_index,control_index,numseqs_threshold){
  index1<-sample_index_to_medianlengthNumreads_index(treatment_index)
  index2<-sample_index_to_medianlengthNumreads_index(control_index)
  df<-df[which(df[,index1-1]>numseqs_threshold & df[,index2-1]>numseqs_threshold),]
  out<-data.frame(names=as.character(df$rownames),lenshift=df[,index1]-df[,index2])
  out$names<-as.character(out$names)
  return(out)
}


thr<-0
median_lenshift_thr<-data.frame(
                            names<-c(
                            lenshift(merged_df,2,1,thr)[,1],
                            lenshift(merged_df,3,1,thr)[,1],
                            lenshift(merged_df,4,1,thr)[,1],
                            lenshift(merged_df,6,5,thr)[,1],
                            lenshift(merged_df,7,5,thr)[,1],
                            lenshift(merged_df,8,5,thr)[,1]),
                            median_length_shift<-c(
                            lenshift(merged_df,2,1,thr)[,2],
                            lenshift(merged_df,3,1,thr)[,2],
                            lenshift(merged_df,4,1,thr)[,2],
                            lenshift(merged_df,6,5,thr)[,2],
                            lenshift(merged_df,7,5,thr)[,2],
                            lenshift(merged_df,8,5,thr)[,2]),
                            sample<-c(
                            rep(names_agg[2],nrow(lenshift(merged_df,2,1,thr))),
                            rep(names_agg[3],nrow(lenshift(merged_df,3,1,thr))),
                            rep(names_agg[4],nrow(lenshift(merged_df,4,1,thr))),
                            rep(names_agg[6],nrow(lenshift(merged_df,6,5,thr))),
                            rep(names_agg[7],nrow(lenshift(merged_df,7,5,thr))),
                            rep(names_agg[8],nrow(lenshift(merged_df,8,5,thr)))
                            ),
                            fraction<-c(
                            rep("nucleoplasm",nrow(lenshift(merged_df,2,1,thr))),
                            rep("nucleoplasm",nrow(lenshift(merged_df,3,1,thr))),
                            rep("nucleoplasm",nrow(lenshift(merged_df,4,1,thr))),
                            rep("chromatin",nrow(lenshift(merged_df,6,5,thr))),
                            rep("chromatin",nrow(lenshift(merged_df,7,5,thr))),
                            rep("chromatin",nrow(lenshift(merged_df,8,5,thr)))
                            ),
                            condition<-c(
                            rep("N2 ints-11 RNAi",nrow(lenshift(merged_df,2,1,thr))),
                            rep("tfiis EV",nrow(lenshift(merged_df,3,1,thr))),
                            rep("tfiis ints-11 RNAi",nrow(lenshift(merged_df,4,1,thr))),
                            rep("N2 ints-11 RNAi",nrow(lenshift(merged_df,6,5,thr))),
                            rep("tfiis EV",nrow(lenshift(merged_df,7,5,thr))),
                            rep("tfiis ints-11 RNAi",nrow(lenshift(merged_df,8,5,thr)))
                            ))

colnames(median_lenshift_thr)<-c("piRNA_IDs","median length shift","sample","fraction","condition")
median_lenshift_thr$sample<-factor(median_lenshift_thr$sample,levels = names_agg)
median_lenshift_thr$condition<-factor(median_lenshift_thr$condition,levels=c("N2 ints-11 RNAi","tfiis EV","tfiis ints-11 RNAi"))
median_lenshift_thr$fraction<-factor(median_lenshift_thr$fraction,levels=c("total","nucleoplasm","chromatin"))

ggplot(median_lenshift_thr)+geom_boxplot(aes(y=`median length shift`,x=sample,col=fraction,fill=condition),outlier.shape=NA)+
  coord_cartesian(ylim=c(-25,50))+
  scale_fill_manual(values=c(alpha("green",0.2),alpha("red",0.2),alpha("blue",0.2)))+
  scale_colour_manual(values=c("red","black"))+
  geom_hline(yintercept = 0,col="red",linetype="dashed")+
  theme_classic()+
   ylab("median length shift")+
  ggtitle("median length shift - uncollapsed reads")

ggsave("median_length_shift_uncollapsedreads_aggreps.pdf")
## Saving 7 x 5 in image
median_lenshift_thr_pausingstrength<-merge(median_lenshift_thr,pausing_strength_data,
                                                   by.x="piRNA_IDs",by.y="V4")
median_lenshift_thr_pausingstrength$tm_downstream_bins<-factor(median_lenshift_thr_pausingstrength$tm_downstream_bins,                                                                   levels=c("(-713,-94.8]","(-94.8,-18.3]","(-18.3,39.8]","(39.8,101]","(101,412]"))
table(median_lenshift_thr_pausingstrength$tm_downstream_bins)
## 
##  (-713,-94.8] (-94.8,-18.3]  (-18.3,39.8]    (39.8,101]     (101,412] 
##          2117          2017          1929          1921          1530
ggplot(median_lenshift_thr_pausingstrength)+geom_boxplot(aes(y=`median length shift`,x=sample,col=fraction,fill=tm_downstream_bins),outlier.shape=NA)+
  coord_cartesian(ylim=c(-25,50))+
  scale_colour_manual(values=c("red","black"))+
  scale_fill_brewer(palette = "GnBu")+
  geom_hline(yintercept = 0,col="red",linetype="dashed")+
  theme_classic()+
   ylab("median length shift")+
  ggtitle("median length shift - uncollapsed reads")

ggsave("median_length_shift_uncollapsedreads_aggreps_pausingstrength.pdf")
## Saving 7 x 5 in image

Mean length shift - uncollapsed reads

sample_index_to_meanlengthNumreads_index<-function(index){return((index-1)*6+6)}

lenshift<-function(df,treatment_index,control_index,numseqs_threshold){
  index1<-sample_index_to_medianlengthNumreads_index(treatment_index)
  index2<-sample_index_to_medianlengthNumreads_index(control_index)
  df<-df[which(df[,index1-1]>numseqs_threshold & df[,index2-1]>numseqs_threshold),]
  out<-data.frame(names=as.character(df$rownames),lenshift=df[,index1]-df[,index2])
  out$names<-as.character(out$names)
  return(out)
}


thr<-0
median_lenshift_thr<-data.frame(
                            names<-c(
                            lenshift(merged_df,2,1,thr)[,1],
                            lenshift(merged_df,3,1,thr)[,1],
                            lenshift(merged_df,4,1,thr)[,1],
                            lenshift(merged_df,6,5,thr)[,1],
                            lenshift(merged_df,7,5,thr)[,1],
                            lenshift(merged_df,8,5,thr)[,1]),
                            median_length_shift<-c(
                            lenshift(merged_df,2,1,thr)[,2],
                            lenshift(merged_df,3,1,thr)[,2],
                            lenshift(merged_df,4,1,thr)[,2],
                            lenshift(merged_df,6,5,thr)[,2],
                            lenshift(merged_df,7,5,thr)[,2],
                            lenshift(merged_df,8,5,thr)[,2]),
                            sample<-c(
                            rep(names_agg[2],nrow(lenshift(merged_df,2,1,thr))),
                            rep(names_agg[3],nrow(lenshift(merged_df,3,1,thr))),
                            rep(names_agg[4],nrow(lenshift(merged_df,4,1,thr))),
                            rep(names_agg[6],nrow(lenshift(merged_df,6,5,thr))),
                            rep(names_agg[7],nrow(lenshift(merged_df,7,5,thr))),
                            rep(names_agg[8],nrow(lenshift(merged_df,8,5,thr)))
                            ),
                            fraction<-c(
                            rep("nucleoplasm",nrow(lenshift(merged_df,2,1,thr))),
                            rep("nucleoplasm",nrow(lenshift(merged_df,3,1,thr))),
                            rep("nucleoplasm",nrow(lenshift(merged_df,4,1,thr))),
                            rep("chromatin",nrow(lenshift(merged_df,6,5,thr))),
                            rep("chromatin",nrow(lenshift(merged_df,7,5,thr))),
                            rep("chromatin",nrow(lenshift(merged_df,8,5,thr)))
                            ),
                            condition<-c(
                            rep("N2 ints-11 RNAi",nrow(lenshift(merged_df,2,1,thr))),
                            rep("tfiis EV",nrow(lenshift(merged_df,3,1,thr))),
                            rep("tfiis ints-11 RNAi",nrow(lenshift(merged_df,4,1,thr))),
                            rep("N2 ints-11 RNAi",nrow(lenshift(merged_df,6,5,thr))),
                            rep("tfiis EV",nrow(lenshift(merged_df,7,5,thr))),
                            rep("tfiis ints-11 RNAi",nrow(lenshift(merged_df,8,5,thr)))
                            ))

colnames(median_lenshift_thr)<-c("piRNA_IDs","mean length shift","sample","fraction","condition")
median_lenshift_thr$sample<-factor(median_lenshift_thr$sample,levels = names_agg)
median_lenshift_thr$condition<-factor(median_lenshift_thr$condition,levels=c("N2 ints-11 RNAi","tfiis EV","tfiis ints-11 RNAi"))
median_lenshift_thr$fraction<-factor(median_lenshift_thr$fraction,levels=c("total","nucleoplasm","chromatin"))

ggplot(median_lenshift_thr)+geom_boxplot(aes(y=`mean length shift`,x=sample,col=fraction,fill=condition),outlier.shape=NA)+
  coord_cartesian(ylim=c(-25,50))+
  scale_fill_manual(values=c(alpha("green",0.2),alpha("red",0.2),alpha("blue",0.2)))+
  scale_colour_manual(values=c("red","black"))+
  geom_hline(yintercept = 0,col="red",linetype="dashed")+
  theme_classic()+
  ylab("mean length shift")+
  ggtitle("mean length shift - uncollapsed reads")

ggsave("mean_length_shift_uncollapsedreads_aggreps.pdf")
## Saving 7 x 5 in image
median_lenshift_thr_pausingstrength<-merge(median_lenshift_thr,pausing_strength_data,
                                                   by.x="piRNA_IDs",by.y="V4")
median_lenshift_thr_pausingstrength$tm_downstream_bins<-factor(median_lenshift_thr_pausingstrength$tm_downstream_bins,                                                                   levels=c("(-713,-94.8]","(-94.8,-18.3]","(-18.3,39.8]","(39.8,101]","(101,412]"))
table(median_lenshift_thr_pausingstrength$tm_downstream_bins)
## 
##  (-713,-94.8] (-94.8,-18.3]  (-18.3,39.8]    (39.8,101]     (101,412] 
##          2117          2017          1929          1921          1530
ggplot(median_lenshift_thr_pausingstrength)+geom_boxplot(aes(y=`mean length shift`,x=sample,col=fraction,fill=tm_downstream_bins),outlier.shape=NA)+
  coord_cartesian(ylim=c(-25,50))+
  scale_colour_manual(values=c("red","black"))+
  scale_fill_brewer(palette = "GnBu")+
  geom_hline(yintercept = 0,col="red",linetype="dashed")+
  theme_classic()+
  ylab("mean length shift")+
  ggtitle("mean length shift - uncollapsed reads")

ggsave("mean_length_shift_uncollapsedreads_aggreps_pausingstrength.pdf")
## Saving 7 x 5 in image
#ints11 alone in NP
index1<-sample_index_to_meanlengthNumreads_index(1)
index2<-sample_index_to_meanlengthNumreads_index(2)
wilcox.test(merged_df[,index1],merged_df[,index2],paired = TRUE)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  merged_df[, index1] and merged_df[, index2]
## V = 194502, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
#tfiis in NP
index1<-sample_index_to_meanlengthNumreads_index(1)
index2<-sample_index_to_meanlengthNumreads_index(3)
wilcox.test(merged_df[,index1],merged_df[,index2],paired = TRUE)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  merged_df[, index1] and merged_df[, index2]
## V = 186895, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
#ints11 tfiis in NP
index1<-sample_index_to_meanlengthNumreads_index(1)
index2<-sample_index_to_meanlengthNumreads_index(4)
wilcox.test(merged_df[,index1],merged_df[,index2],paired = TRUE)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  merged_df[, index1] and merged_df[, index2]
## V = 61056, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
#ints11 alone in CHR
index1<-sample_index_to_meanlengthNumreads_index(5)
index2<-sample_index_to_meanlengthNumreads_index(6)
wilcox.test(merged_df[,index1],merged_df[,index2],paired = TRUE)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  merged_df[, index1] and merged_df[, index2]
## V = 467499, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
#tfiis in CHR
index1<-sample_index_to_meanlengthNumreads_index(5)
index2<-sample_index_to_meanlengthNumreads_index(7)
wilcox.test(merged_df[,index1],merged_df[,index2],paired = TRUE)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  merged_df[, index1] and merged_df[, index2]
## V = 1053894, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
#ints11 tfiis in CHR
index1<-sample_index_to_meanlengthNumreads_index(5)
index2<-sample_index_to_meanlengthNumreads_index(8)
wilcox.test(merged_df[,index1],merged_df[,index2],paired = TRUE)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  merged_df[, index1] and merged_df[, index2]
## V = 277252, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0

Median length heatmap and correlations

merged_df_medianlength<-merged_df[,seq(2,44,by = 6)]
colnames(merged_df_medianlength)<-names_agg
merged_df_medianlength<-merged_df_medianlength[order(apply(merged_df_medianlength,MARGIN=1,FUN=sum,na.rm=TRUE),
                                                           decreasing=FALSE),]
merged_df_medianlength$id<-rownames(merged_df_medianlength)
melted_df<-melt(merged_df_medianlength)
## Using id as id variables
melted_df$id<-factor(melted_df$id,levels=merged_df_medianlength$id)

ggplot(melted_df, aes(variable,id, fill=value)) + geom_raster() +
  theme(axis.text.y=element_blank(),
        axis.ticks.y=element_blank())+
  scale_fill_gradientn(colors = c("blue","pink","red"))+
  ylab("motif-dependent piRNA loci")+xlab("condition")

cor(merged_df_medianlength[,1:8], use = "pairwise.complete.obs")
##                   N2_EV_np N2_ints11_np tfiis_EV_np tfiis_ints11_np N2_EV_chr
## N2_EV_np         1.0000000    0.3660523   0.4106685       0.2646039 0.3558378
## N2_ints11_np     0.3660523    1.0000000   0.4582556       0.5076408 0.3725152
## tfiis_EV_np      0.4106685    0.4582556   1.0000000       0.4021936 0.4746218
## tfiis_ints11_np  0.2646039    0.5076408   0.4021936       1.0000000 0.3870915
## N2_EV_chr        0.3558378    0.3725152   0.4746218       0.3870915 1.0000000
## N2_ints11_chr    0.2840870    0.3640658   0.3181022       0.4378607 0.4740678
## tfiis_EV_chr     0.3054843    0.4103333   0.5112650       0.4152849 0.5844457
## tfiis_ints11_chr 0.1672338    0.2898436   0.3796898       0.4070145 0.4064303
##                  N2_ints11_chr tfiis_EV_chr tfiis_ints11_chr
## N2_EV_np             0.2840870    0.3054843        0.1672338
## N2_ints11_np         0.3640658    0.4103333        0.2898436
## tfiis_EV_np          0.3181022    0.5112650        0.3796898
## tfiis_ints11_np      0.4378607    0.4152849        0.4070145
## N2_EV_chr            0.4740678    0.5844457        0.4064303
## N2_ints11_chr        1.0000000    0.4116580        0.3955375
## tfiis_EV_chr         0.4116580    1.0000000        0.4512372
## tfiis_ints11_chr     0.3955375    0.4512372        1.0000000
ggplot(merged_df_medianlength)+geom_point(aes(x=N2_EV_np,y=N2_EV_chr))
## Warning: Removed 5094 rows containing missing values (geom_point).

ggplot(merged_df_medianlength)+geom_point(aes(x=N2_ints11_np,y=N2_ints11_chr))
## Warning: Removed 5895 rows containing missing values (geom_point).

ggplot(merged_df_medianlength)+geom_point(aes(x=tfiis_EV_np,y=tfiis_EV_chr))
## Warning: Removed 5441 rows containing missing values (geom_point).

ggplot(merged_df_medianlength)+geom_point(aes(x=tfiis_ints11_np,y=N2_ints11_chr))
## Warning: Removed 6085 rows containing missing values (geom_point).

merged_df_medianlength_pausingstrength<-merge(merged_df_medianlength,pausing_strength_data,
                                              by.x=0,by.y="V4")
merged_df_medianlength_pausingstrength$tm_downstream_bins<-factor(merged_df_medianlength_pausingstrength$tm_downstream_bins,
                                                                levels = c("(-713,-94.8]","(-94.8,-18.3]","(-18.3,39.8]",
                                                                           "(39.8,101]","(101,412]"))

ggplot(merged_df_medianlength_pausingstrength)+geom_point(aes(x=N2_EV_np,y=N2_EV_chr,col=tm_downstream_bins),
                                                          size=0.5,stroke=0.5,alpha=0.5)+
  facet_grid(. ~ tm_downstream_bins)+theme_classic()+theme(aspect.ratio = 1)
## Warning: Removed 5094 rows containing missing values (geom_point).

ggsave("np_vs_chr_medianlength_N2_EV.pdf")
## Saving 7 x 5 in image
## Warning: Removed 5094 rows containing missing values (geom_point).
ggplot(merged_df_medianlength_pausingstrength)+geom_point(aes(x=N2_ints11_np,y=N2_ints11_chr,col=tm_downstream_bins),
                                                          size=0.5,stroke=0.5,alpha=0.5)+
  facet_grid(. ~ tm_downstream_bins)+theme_classic()+theme(aspect.ratio = 1)
## Warning: Removed 5895 rows containing missing values (geom_point).

ggsave("np_vs_chr_medianlength_N2_ints11RNAi.pdf")
## Saving 7 x 5 in image
## Warning: Removed 5895 rows containing missing values (geom_point).
ggplot(merged_df_medianlength_pausingstrength)+geom_point(aes(x=tfiis_EV_np,y=tfiis_EV_chr,col=tm_downstream_bins),
                                                          size=0.5,stroke=0.5,alpha=0.5)+
  facet_grid(. ~ tm_downstream_bins)+theme_classic()+theme(aspect.ratio = 1)
## Warning: Removed 5441 rows containing missing values (geom_point).

ggsave("np_vs_chr_medianlength_tfiis_EV.pdf")
## Saving 7 x 5 in image
## Warning: Removed 5441 rows containing missing values (geom_point).
ggplot(merged_df_medianlength_pausingstrength)+geom_point(aes(x=tfiis_ints11_np,y=tfiis_ints11_chr,col=tm_downstream_bins),
                                                          size=0.5,stroke=0.5,alpha=0.5)+
  facet_grid(. ~ tm_downstream_bins)+theme_classic()+theme(aspect.ratio = 1)
## Warning: Removed 5806 rows containing missing values (geom_point).

ggsave("np_vs_chr_medianlength_tfiis_ints11RNAi.pdf")
## Saving 7 x 5 in image
## Warning: Removed 5806 rows containing missing values (geom_point).

Mean length heatmap and correlations

merged_df_meanlength<-merged_df[,seq(3,45,by = 6)]
colnames(merged_df_meanlength)<-names_agg
merged_df_meanlength<-merged_df_meanlength[order(apply(merged_df_meanlength,MARGIN=1,FUN=sum,na.rm=TRUE),
                                                           decreasing=FALSE),]
merged_df_meanlength$id<-rownames(merged_df_meanlength)
melted_df<-melt(merged_df_meanlength)
## Using id as id variables
melted_df$id<-factor(melted_df$id,levels=merged_df_meanlength$id)

ggplot(melted_df, aes(variable,id, fill=value)) + geom_raster() +
  theme(axis.text.y=element_blank(),
        axis.ticks.y=element_blank())+
  scale_fill_gradientn(colors = c("blue","pink","red"))+
  ylab("motif-dependent piRNA loci")+xlab("condition")

cor(merged_df_meanlength[,1:8], use = "pairwise.complete.obs")
##                   N2_EV_np N2_ints11_np tfiis_EV_np tfiis_ints11_np N2_EV_chr
## N2_EV_np         1.0000000    0.3756494   0.4151220       0.2727414 0.3784628
## N2_ints11_np     0.3756494    1.0000000   0.4540211       0.5258158 0.3846776
## tfiis_EV_np      0.4151220    0.4540211   1.0000000       0.4002352 0.4712512
## tfiis_ints11_np  0.2727414    0.5258158   0.4002352       1.0000000 0.3808146
## N2_EV_chr        0.3784628    0.3846776   0.4712512       0.3808146 1.0000000
## N2_ints11_chr    0.3034803    0.3624034   0.3085957       0.4439244 0.4639374
## tfiis_EV_chr     0.3291791    0.4237846   0.5066498       0.4174960 0.5973806
## tfiis_ints11_chr 0.1885830    0.2966646   0.3699152       0.4099851 0.4102228
##                  N2_ints11_chr tfiis_EV_chr tfiis_ints11_chr
## N2_EV_np             0.3034803    0.3291791        0.1885830
## N2_ints11_np         0.3624034    0.4237846        0.2966646
## tfiis_EV_np          0.3085957    0.5066498        0.3699152
## tfiis_ints11_np      0.4439244    0.4174960        0.4099851
## N2_EV_chr            0.4639374    0.5973806        0.4102228
## N2_ints11_chr        1.0000000    0.4031160        0.3980147
## tfiis_EV_chr         0.4031160    1.0000000        0.4447199
## tfiis_ints11_chr     0.3980147    0.4447199        1.0000000
ggplot(merged_df_meanlength)+geom_point(aes(x=N2_EV_np,y=N2_EV_chr))+xlim(18,75)
## Warning: Removed 5094 rows containing missing values (geom_point).

ggplot(merged_df_meanlength)+geom_point(aes(x=N2_ints11_np,y=N2_ints11_chr))+xlim(18,75)
## Warning: Removed 5895 rows containing missing values (geom_point).

ggplot(merged_df_meanlength)+geom_point(aes(x=tfiis_EV_np,y=tfiis_EV_chr))+xlim(18,75)
## Warning: Removed 5441 rows containing missing values (geom_point).

ggplot(merged_df_meanlength)+geom_point(aes(x=tfiis_ints11_np,y=N2_ints11_chr))+xlim(18,75)
## Warning: Removed 6085 rows containing missing values (geom_point).

merged_df_meanlength_pausingstrength<-merge(merged_df_meanlength,pausing_strength_data,
                                              by.x=0,by.y="V4")
merged_df_meanlength_pausingstrength$tm_downstream_bins<-factor(merged_df_meanlength_pausingstrength$tm_downstream_bins,
                                                                levels = c("(-713,-94.8]","(-94.8,-18.3]","(-18.3,39.8]",
                                                                           "(39.8,101]","(101,412]"))


ggplot(merged_df_meanlength_pausingstrength)+geom_point(aes(x=N2_EV_np,y=N2_EV_chr,col=tm_downstream_bins),
                                                        size=0.5,stroke=0.5,alpha=0.5)+
  facet_grid(. ~ tm_downstream_bins)+theme_classic()+theme(aspect.ratio = 1)
## Warning: Removed 5094 rows containing missing values (geom_point).

ggsave("np_vs_chr_meanlength_N2_EV.pdf")
## Saving 7 x 5 in image
## Warning: Removed 5094 rows containing missing values (geom_point).
ggplot(merged_df_meanlength_pausingstrength)+geom_point(aes(x=N2_ints11_np,y=N2_ints11_chr,col=tm_downstream_bins),
                                                        size=0.5,stroke=0.5,alpha=0.5)+
  facet_grid(. ~ tm_downstream_bins)+theme_classic()+theme(aspect.ratio = 1)
## Warning: Removed 5895 rows containing missing values (geom_point).

ggsave("np_vs_chr_meanlength_N2_ints11RNAi.pdf")
## Saving 7 x 5 in image
## Warning: Removed 5895 rows containing missing values (geom_point).
ggplot(merged_df_meanlength_pausingstrength)+geom_point(aes(x=tfiis_EV_np,y=tfiis_EV_chr,col=tm_downstream_bins),
                                                        size=0.5,stroke=0.5,alpha=0.5)+
  facet_grid(. ~ tm_downstream_bins)+theme_classic()+theme(aspect.ratio = 1)
## Warning: Removed 5441 rows containing missing values (geom_point).

ggsave("np_vs_chr_meanlength_tfiis_EV.pdf")
## Saving 7 x 5 in image
## Warning: Removed 5441 rows containing missing values (geom_point).
ggplot(merged_df_meanlength_pausingstrength)+geom_point(aes(x=tfiis_ints11_np,y=tfiis_ints11_chr,col=tm_downstream_bins),
                                                        size=0.5,stroke=0.5,alpha=0.5)+
  facet_grid(. ~ tm_downstream_bins)+theme_classic()+theme(aspect.ratio = 1)
## Warning: Removed 5806 rows containing missing values (geom_point).

ggsave("np_vs_chr_meanlength_tfiis_ints11RNAi.pdf")
## Saving 7 x 5 in image
## Warning: Removed 5806 rows containing missing values (geom_point).